Expression data and meta data is downloaded from GEO. Samples are quantile normalized and visualized with PCA and tSNE.
#load libraries
library(GEOquery)
Registered S3 method overwritten by 'dplyr':
method from
print.rowwise_df
Setting options('download.file.method.GEOquery'='auto')
Setting options('GEOquery.inmemory.gpl'=FALSE)
library(preprocessCore)
library(ggplot2)
library(Rtsne)
library(plotly)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 methods overwritten by 'htmltools':
method from
print.html tools:rstudio
print.shiny.tag tools:rstudio
print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attaching package: ‘plotly’
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
library(ggthemes)
#downlaod data from GEO
gse <- getGEO('GSE50588')[[1]]
Found 1 file(s)
GSE50588_series_matrix.txt.gz
trying URL 'https://ftp.ncbi.nlm.nih.gov/geo/series/GSE50nnn/GSE50588/matrix/GSE50588_series_matrix.txt.gz'
Content type 'application/x-gzip' length 14767110 bytes (14.1 MB)
==================================================
downloaded 14.1 MB
Parsed with column specification:
cols(
.default = col_double(),
ID_REF = [31mcol_character()[39m
)
See spec(...) for full column specifications.
File stored at:
/var/folders/vr/g6hydgcs71s5t3jc2xvcp_cm0000gn/T//RtmppNsuQ6/GPL10558.soft
3270 parsing failures.
row col expected actual file
29537 Unigene_ID 1/0/T/F/TRUE/FALSE Hs.571610 literal data
29538 Unigene_ID 1/0/T/F/TRUE/FALSE Hs.545780 literal data
29539 Unigene_ID 1/0/T/F/TRUE/FALSE Hs.554603 literal data
29540 Unigene_ID 1/0/T/F/TRUE/FALSE Hs.437179 literal data
29541 Unigene_ID 1/0/T/F/TRUE/FALSE Hs.128234 literal data
..... .......... .................. ......... ............
See problems(...) for more details.
#parse expression matrix
exp = gse@assayData$exprs
#quantile normalize expression matrix
norm_exp = normalize.quantiles(exp)
colnames(norm_exp) = colnames(exp)
rownames(norm_exp) = rownames(exp)
#parse metadata dataframe
meta = gse@phenoData@data
#compute PCs
pca_norm = prcomp(norm_exp)$rotation[,1:3]
#add relevant metadata to pca
pca_norm = cbind(pca_norm,data.frame(target_gene = meta[match(rownames(pca_norm),rownames(meta)),c("target gene:ch1")], stringsAsFactors = F))
ggplot(as.data.frame(pca_norm),aes(x = PC2, y = PC3, color = target_gene)) + geom_point()
#compute tsne
tsne_norm = Rtsne(t(norm_exp),dim = 3,perplexity = 6)$Y
colnames(tsne_norm) = c("tSNE1","tSNE2","tSNE3")
rownames(tsne_norm) = colnames(norm_exp)
#add relevant metadata to tsne
tsne_norm = cbind(tsne_norm,data.frame(target_gene = meta[match(rownames(tsne_norm),rownames(meta)),c("target gene:ch1")], stringsAsFactors = F))
ggplot(as.data.frame(tsne_norm),aes(x = tSNE1, y = tSNE2, color = target_gene)) + geom_point()
plot_ly(tsne_norm, x = ~tSNE1, y = ~tSNE2, z = ~tSNE3, color = ~target_gene)
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
No scatter3d mode specifed:
Setting the mode to markers
Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
No scatter3d mode specifed:
Setting the mode to markers
Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
Z-score samples and visualize
znorm_exp = t(scale(t(norm_exp), center = T, scale = T))
#compute tsne
tsne_znorm = Rtsne(t(znorm_exp),dim = 3,perplexity = 3,theta = 0.001)$Y
colnames(tsne_znorm) = c("tSNE1","tSNE2","tSNE3")
rownames(tsne_znorm) = colnames(znorm_exp)
#add relevant metadata to tsne
tsne_znorm = cbind(tsne_znorm,data.frame(target_gene = meta[match(rownames(tsne_znorm),rownames(meta)),c("target gene:ch1")], stringsAsFactors = F))
ggplot(as.data.frame(tsne_znorm),aes(x = tSNE1, y = tSNE2, color = target_gene)) + geom_point() + theme_few()
plot_ly(tsne_znorm, x = ~tSNE1, y = ~tSNE2, z = ~tSNE3, color = ~target_gene)
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
No scatter3d mode specifed:
Setting the mode to markers
Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
No scatter3d mode specifed:
Setting the mode to markers
Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
Generate Signatures
library(GeoDE)
Loading required package: Matrix
Loading required package: MASS
Attaching package: ‘MASS’
The following object is masked from ‘package:plotly’:
select
library(matrixStats)
Attaching package: ‘matrixStats’
The following objects are masked from ‘package:Biobase’:
anyMissing, rowMedians
library(limma)
#map Illumina probes to gene ids
library(illuminaHumanv4.db)
Loading required package: AnnotationDbi
Loading required package: stats4
Loading required package: IRanges
Loading required package: S4Vectors
Attaching package: ‘S4Vectors’
The following object is masked from ‘package:Matrix’:
expand
The following object is masked from ‘package:plotly’:
rename
The following object is masked from ‘package:base’:
expand.grid
Attaching package: ‘IRanges’
The following object is masked from ‘package:plotly’:
slice
Attaching package: ‘AnnotationDbi’
The following object is masked from ‘package:MASS’:
select
The following object is masked from ‘package:plotly’:
select
Loading required package: org.Hs.eg.db
gene_names = data.frame(Gene=unlist(mget(x = rownames(norm_exp),envir = illuminaHumanv4SYMBOL)))
#remove probes without mapped genes
norm_exp_df=as.data.frame(norm_exp)
norm_exp_df$gene_names = as.character(gene_names[,1])
norm_exp_df = norm_exp_df[!is.na(norm_exp_df$gene_names),]
rm(gene_names,norm_exp)
#where probes map to same genes, chose probes with greater variance
norm_exp_df = norm_exp_df[order(rowVars(as.matrix(norm_exp_df[,setdiff(colnames(norm_exp_df),"gene_names")])), decreasing = T),]
norm_exp_df = norm_exp_df[!duplicated(norm_exp_df$gene_names),]
# norm_exp_df = plyr::ddply(norm_exp_df,plyr::.(gene_names),function(exp_sub){
# if(nrow(exp_sub)>1){
# exp_sub_mat = exp_sub
# exp_sub_mat$gene_names = NULL
# exp_sub_mat = as.matrix(exp_sub_mat)
# return(exp_sub[which.max(rowVars(exp_sub_mat)),])
# }else{
# return(exp_sub)
# }
# })
rownames(norm_exp_df) = norm_exp_df$gene_names
norm_exp_df$gene_names = NULL
##write metadata and normalized expression to file
write.table(meta,"/Volumes/My Passport/ChromNet/Cusanovich/meta.tsv",row.names = T, col.names = T, sep = "\t", quote = T)
write.table(norm_exp_df, "/Volumes/My Passport/ChromNet/Cusanovich/norm_exp.tsv", row.names = T, col.names = T, sep = "\t", quote = F)
#pull out control expression
ctl_accessions = meta[meta$`target gene:ch1`=="NS",]$geo_accession
norm_ctl_exp = norm_exp_df[,colnames(norm_exp_df) %in% ctl_accessions]
ctl_class = rep(1,ncol(norm_ctl_exp))
#generate chardir signature for each TF knockdown
chardir_sigs = plyr::dlply(meta[meta$`target gene:ch1`!="NS",],
plyr::.(`target gene:ch1`), function(tf){
pert_accessions = tf$geo_accession
norm_pert_exp = norm_exp_df[,colnames(norm_exp_df) %in% pert_accessions]
pert_class = rep(2,ncol(norm_pert_exp))
norm_instance_exp = cbind(norm_ctl_exp,norm_pert_exp)
norm_instance_exp = cbind(data.frame(genenames = rownames(norm_instance_exp)),
norm_instance_exp)
instance_class = as.factor(c(ctl_class,pert_class))
return(chdirAnalysis(norm_instance_exp,
instance_class,
list(1),
CalculateSig = F)$results[[1]])
})
limma_sigs = plyr::dlply(meta[meta$`target gene:ch1`!="NS",],
plyr::.(`target gene:ch1`), function(tf){
pert_accessions = tf$geo_accession
norm_pert_exp = norm_exp_df[,colnames(norm_exp_df) %in% pert_accessions]
pert_class = rep("pert",ncol(norm_pert_exp))
norm_instance_exp = cbind(norm_ctl_exp,norm_pert_exp)
design = data.frame(Control = c(rep(1,ncol(norm_ctl_exp)),rep(0,ncol(norm_pert_exp))),
Perturbation = c(rep(0, ncol(norm_ctl_exp)), rep(1, ncol(norm_pert_exp))))
rownames(design) = colnames(norm_instance_exp)
fit = lmFit(norm_instance_exp, design=design)
cont.matrix = makeContrasts(PerturbationvsControl=Perturbation-Control, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit = eBayes(fit2)
return(topTable(fit, n = Inf))
})
limma_FC = plyr::ldply(limma_sigs,function(sig){
df = data.frame(t(sig$logFC))
colnames(df) = rownames(sig)
return(df)
})
rownames(limma_FC) = limma_FC$`target gene:ch1`
limma_FC$`target gene:ch1` = NULL
limma_FC = as.data.frame(t(limma_FC))
limma_adj_pvals = plyr::ldply(limma_sigs,function(sig){
df = data.frame(t(sig$P.Value))
colnames(df) = rownames(sig)
return(df)
})
rownames(limma_adj_pvals) = limma_adj_pvals$`target gene:ch1`
limma_adj_pvals$`target gene:ch1` = NULL
limma_adj_pvals = as.data.frame(t(limma_adj_pvals))
#write limma signature matrices to file
write.table(limma_FC,"/volumes/My Passport/ChromNet/Cusanovich/limma_sigs_FC.tsv", sep = "\t", quote = F, col.names = T, row.names = T)
write.table(limma_adj_pvals,"/volumes/My Passport/ChromNet/Cusanovich/limma_sigs_adj_pval.tsv", sep = "\t", quote = F, col.names = T, row.names = T)
Generate a signature matrix from the signature list
write.table(chardir_sigs_mat,"/Volumes/My Passport/Cusanovich/chardir_sigs_mat.tsv",sep = "\t", quote = F, row.names = T, col.names = T)
cannot open file '/Volumes/My Passport/Cusanovich/chardir_sigs_mat.tsv': No such file or directoryError in file(file, ifelse(append, "a", "w")) :
cannot open the connection